NOTE Before starting this activity please remember to clear your environment.
rm(list = ls(all=TRUE))
Read the dataset
Data pre-processing
Explore the dataset
# Use the setwd() function to get to the directory where the data is present
cereals_data = read.csv('Cereals.csv', header = T)
str(cereals_data)
## 'data.frame': 77 obs. of 14 variables:
## $ name : Factor w/ 77 levels "100%_Bran","100%_Natural_Bran",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ calories: int 70 120 70 50 110 110 110 130 90 90 ...
## $ protein : int 4 3 4 4 2 2 2 3 2 3 ...
## $ fat : int 1 5 1 0 2 2 0 2 1 0 ...
## $ sodium : int 130 15 260 140 200 180 125 210 200 210 ...
## $ fiber : num 10 2 9 14 1 1.5 1 2 4 5 ...
## $ carbo : num 5 8 7 8 14 10.5 11 18 15 13 ...
## $ sugars : int 6 8 5 0 8 10 14 8 6 5 ...
## $ potass : int 280 135 320 330 NA 70 30 100 125 190 ...
## $ vitamins: int 25 0 25 25 25 25 25 25 25 25 ...
## $ shelf : int 3 3 3 3 3 1 2 3 1 3 ...
## $ weight : num 1 1 1 1 1 1 1 1.33 1 1 ...
## $ cups : num 0.33 1 0.33 0.5 0.75 0.75 1 0.75 0.67 0.67 ...
## $ rating : num 68.4 34 59.4 93.7 34.4 ...
summary(cereals_data)
## name calories protein
## 100%_Bran : 1 Min. : 50.0 Min. :1.000
## 100%_Natural_Bran : 1 1st Qu.:100.0 1st Qu.:2.000
## All-Bran : 1 Median :110.0 Median :3.000
## All-Bran_with_Extra_Fiber: 1 Mean :106.9 Mean :2.545
## Almond_Delight : 1 3rd Qu.:110.0 3rd Qu.:3.000
## Apple_Cinnamon_Cheerios : 1 Max. :160.0 Max. :6.000
## (Other) :71
## fat sodium fiber carbo
## Min. :0.000 Min. : 0.0 Min. : 0.000 Min. : 5.0
## 1st Qu.:0.000 1st Qu.:130.0 1st Qu.: 1.000 1st Qu.:12.0
## Median :1.000 Median :180.0 Median : 2.000 Median :14.5
## Mean :1.013 Mean :159.7 Mean : 2.152 Mean :14.8
## 3rd Qu.:2.000 3rd Qu.:210.0 3rd Qu.: 3.000 3rd Qu.:17.0
## Max. :5.000 Max. :320.0 Max. :14.000 Max. :23.0
## NA's :1
## sugars potass vitamins shelf
## Min. : 0.000 Min. : 15.00 Min. : 0.00 Min. :1.000
## 1st Qu.: 3.000 1st Qu.: 42.50 1st Qu.: 25.00 1st Qu.:1.000
## Median : 7.000 Median : 90.00 Median : 25.00 Median :2.000
## Mean : 7.026 Mean : 98.67 Mean : 28.25 Mean :2.208
## 3rd Qu.:11.000 3rd Qu.:120.00 3rd Qu.: 25.00 3rd Qu.:3.000
## Max. :15.000 Max. :330.00 Max. :100.00 Max. :3.000
## NA's :1 NA's :2
## weight cups rating
## Min. :0.50 Min. :0.250 Min. :18.04
## 1st Qu.:1.00 1st Qu.:0.670 1st Qu.:33.17
## Median :1.00 Median :0.750 Median :40.40
## Mean :1.03 Mean :0.821 Mean :42.67
## 3rd Qu.:1.00 3rd Qu.:1.000 3rd Qu.:50.83
## Max. :1.50 Max. :1.500 Max. :93.70
##
The dataset has 77 observations of 14 variables
The column/variable names’ are self explanatory
#See the head and tail of the dataframe
head(cereals_data)
## name calories protein fat sodium fiber carbo sugars
## 1 100%_Bran 70 4 1 130 10.0 5.0 6
## 2 100%_Natural_Bran 120 3 5 15 2.0 8.0 8
## 3 All-Bran 70 4 1 260 9.0 7.0 5
## 4 All-Bran_with_Extra_Fiber 50 4 0 140 14.0 8.0 0
## 5 Almond_Delight 110 2 2 200 1.0 14.0 8
## 6 Apple_Cinnamon_Cheerios 110 2 2 180 1.5 10.5 10
## potass vitamins shelf weight cups rating
## 1 280 25 3 1 0.33 68.40297
## 2 135 0 3 1 1.00 33.98368
## 3 320 25 3 1 0.33 59.42551
## 4 330 25 3 1 0.50 93.70491
## 5 NA 25 3 1 0.75 34.38484
## 6 70 25 1 1 0.75 29.50954
tail(cereals_data)
## name calories protein fat sodium fiber carbo sugars
## 72 Total_Whole_Grain 100 3 1 200 3 16 3
## 73 Triples 110 2 1 250 0 21 3
## 74 Trix 110 1 1 140 0 13 12
## 75 Wheat_Chex 100 3 1 230 3 17 3
## 76 Wheaties 100 3 1 200 3 17 3
## 77 Wheaties_Honey_Gold 110 2 1 200 1 16 8
## potass vitamins shelf weight cups rating
## 72 110 100 3 1 1.00 46.65884
## 73 60 25 3 1 0.75 39.10617
## 74 25 25 2 1 1.00 27.75330
## 75 115 25 1 1 0.67 49.78744
## 76 110 25 1 1 1.00 51.59219
## 77 60 25 1 1 0.75 36.18756
#Store all column names in variable called 'attr'
attr = colnames(cereals_data)
attr
## [1] "name" "calories" "protein" "fat" "sodium" "fiber"
## [7] "carbo" "sugars" "potass" "vitamins" "shelf" "weight"
## [13] "cups" "rating"
#Store all categorical attributes in 'cat_Attr'
cat_Attr = "shelf"
#Now, how to find the numerical attributes?
num_Attr = setdiff(attr, c(cat_Attr, "name"))
num_Attr
## [1] "calories" "protein" "fat" "sodium" "fiber" "carbo"
## [7] "sugars" "potass" "vitamins" "weight" "cups" "rating"
cereals_data$shelf = as.factor(as.character(cereals_data$shelf))
#Now see the structure of the dataframe
str(cereals_data)
## 'data.frame': 77 obs. of 14 variables:
## $ name : Factor w/ 77 levels "100%_Bran","100%_Natural_Bran",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ calories: int 70 120 70 50 110 110 110 130 90 90 ...
## $ protein : int 4 3 4 4 2 2 2 3 2 3 ...
## $ fat : int 1 5 1 0 2 2 0 2 1 0 ...
## $ sodium : int 130 15 260 140 200 180 125 210 200 210 ...
## $ fiber : num 10 2 9 14 1 1.5 1 2 4 5 ...
## $ carbo : num 5 8 7 8 14 10.5 11 18 15 13 ...
## $ sugars : int 6 8 5 0 8 10 14 8 6 5 ...
## $ potass : int 280 135 320 330 NA 70 30 100 125 190 ...
## $ vitamins: int 25 0 25 25 25 25 25 25 25 25 ...
## $ shelf : Factor w/ 3 levels "1","2","3": 3 3 3 3 3 1 2 3 1 3 ...
## $ weight : num 1 1 1 1 1 1 1 1.33 1 1 ...
## $ cups : num 0.33 1 0.33 0.5 0.75 0.75 1 0.75 0.67 0.67 ...
## $ rating : num 68.4 34 59.4 93.7 34.4 ...
rownames(cereals_data) <- cereals_data$name
#cereals_data <- cereals_data[, -c(colnames(cereals_data) %in% ("name"))]
# (or)
cereals_data$name = NULL
sum(is.na(cereals_data))
## [1] 4
library(DMwR)
## Loading required package: lattice
## Loading required package: grid
sum(is.na(cereals_data$shelf))
## [1] 0
cereals_data[,num_Attr] <- knnImputation(cereals_data[,num_Attr], k = 3, scale = T)
sum(is.na(cereals_data))
## [1] 0
#How do you find missing values per column?
colSums(is.na(cereals_data))
## calories protein fat sodium fiber carbo sugars potass
## 0 0 0 0 0 0 0 0
## vitamins shelf weight cups rating
## 0 0 0 0 0
#Make a copy of the dataframe for later use (mixed attributes)
cereals_data_copy = cereals_data
library("dummies")
## dummies-1.5.6 provided by Decision Patterns
shelfDummies = data.frame(dummy(cereals_data$shelf))
#Name the new attributes appropriately
names(shelfDummies) = c("Shelf1","Shelf2","Shelf3")
head(shelfDummies)
## Shelf1 Shelf2 Shelf3
## 1 0 0 1
## 2 0 0 1
## 3 0 0 1
## 4 0 0 1
## 5 0 0 1
## 6 1 0 0
cereals_data$shelf = NULL
cereals_data = data.frame(cbind(cereals_data, shelfDummies))
#check the dataframe using head()
head(cereals_data)
## calories protein fat sodium fiber carbo sugars
## 100%_Bran 70 4 1 130 10.0 5.0 6
## 100%_Natural_Bran 120 3 5 15 2.0 8.0 8
## All-Bran 70 4 1 260 9.0 7.0 5
## All-Bran_with_Extra_Fiber 50 4 0 140 14.0 8.0 0
## Almond_Delight 110 2 2 200 1.0 14.0 8
## Apple_Cinnamon_Cheerios 110 2 2 180 1.5 10.5 10
## potass vitamins weight cups rating Shelf1
## 100%_Bran 280.00000 25 1 0.33 68.40297 0
## 100%_Natural_Bran 135.00000 0 1 1.00 33.98368 0
## All-Bran 320.00000 25 1 0.33 59.42551 0
## All-Bran_with_Extra_Fiber 330.00000 25 1 0.50 93.70491 0
## Almond_Delight 57.84562 25 1 0.75 34.38484 0
## Apple_Cinnamon_Cheerios 70.00000 25 1 0.75 29.50954 1
## Shelf2 Shelf3
## 100%_Bran 0 1
## 100%_Natural_Bran 0 1
## All-Bran 0 1
## All-Bran_with_Extra_Fiber 0 1
## Almond_Delight 0 1
## Apple_Cinnamon_Cheerios 0 0
cereals_data[, num_Attr] = scale(cereals_data[,num_Attr], center = T, scale = T)
# if install.packages("factoextra") doesn't work, use the following
# if(!require(devtools)) install.packages("devtools")
# devtools::install_github("kassambara/factoextra")
library(factoextra)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.4
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
# Use the get_dist() function from the factoexrtra to calculate inter-observation distances
distance <- get_dist(cereals_data)
# The fviz_dist() function plots a visual representation of the inter-observation distances
fviz_dist(distance, gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))
# The gradient argument, helps us define the color range for the distance scale
# We use the euclidean distance measure (all attributes are numerical now)
dist <- dist(cereals_data, method = "euclidean")
hc_fit <- hclust(dist, method = "ward.D2")
#ward.D2 method - find the pair of clusters that leads to minimum increase in total within-cluster variance after merging
plot(hc_fit)
#Plot clusters being surrounded by a border, using the rect.hclust() function
rect.hclust(hc_fit, k = 6, border = "red")
points_hc <- cutree(hc_fit, k = 6)
# Store the clusters in a data frame along with the cereals data
cereals_clusts_hc <- cbind(points_hc, cereals_data)
# Have a look at the head of the new data frame
colnames(cereals_clusts_hc)[1] <- "cluster_hc"
head(cereals_clusts_hc)
## cluster_hc calories protein fat
## 100%_Bran 1 -1.8929836 1.3286071 -0.01290349
## 100%_Natural_Bran 2 0.6732089 0.4151897 3.96137277
## All-Bran 1 -1.8929836 1.3286071 -0.01290349
## All-Bran_with_Extra_Fiber 1 -2.9194605 1.3286071 -1.00647256
## Almond_Delight 3 0.1599704 -0.4982277 0.98066557
## Apple_Cinnamon_Cheerios 3 0.1599704 -0.4982277 0.98066557
## sodium fiber carbo sugars
## 100%_Bran -0.3539844 3.29284661 -2.5214018 -0.2298253
## 100%_Natural_Bran -1.7257708 -0.06375361 -1.7487994 0.2293718
## All-Bran 1.1967306 2.87327158 -2.0063336 -0.4594238
## All-Bran_with_Extra_Fiber -0.2346986 4.97114672 -1.7487994 -1.6074165
## Almond_Delight 0.4810160 -0.48332864 -0.2035948 0.2293718
## Apple_Cinnamon_Cheerios 0.2424445 -0.27354112 -1.1049642 0.6885689
## potass vitamins weight cups
## 100%_Bran 2.6078060 -0.1453172 -0.1967771 -2.1100340
## 100%_Natural_Bran 0.5262315 -1.2642598 -0.1967771 0.7690100
## All-Bran 3.1820334 -0.1453172 -0.1967771 -2.1100340
## All-Bran_with_Extra_Fiber 3.3255903 -0.1453172 -0.1967771 -1.3795303
## Almond_Delight -0.5813726 -0.1453172 -0.1967771 -0.3052601
## Apple_Cinnamon_Cheerios -0.4068881 -0.1453172 -0.1967771 -0.3052601
## rating Shelf1 Shelf2 Shelf3
## 100%_Bran 1.8321876 0 0 1
## 100%_Natural_Bran -0.6180571 0 0 1
## All-Bran 1.1930986 0 0 1
## All-Bran_with_Extra_Fiber 3.6333849 0 0 1
## Almond_Delight -0.5894990 0 0 1
## Apple_Cinnamon_Cheerios -0.9365625 1 0 0
Shiloutte width
library(cluster)
dist = daisy(x = cereals_data, metric = "euclidean")
## Warning in daisy(x = cereals_data, metric = "euclidean"): binary
## variable(s) 13, 14, 15 treated as interval scaled
sil_value = silhouette(points_hc, dist = dist)
plot(sil_value)
sil_value_hc = 0
for (i in 2:20) {
points_hc <- cutree(hc_fit, k = i)
sil_value_hc[i] = mean(silhouette(points_hc, dist = dist)[,3])
}
plot(1:20, sil_value_hc, type = "b", xlab = "No: of Clusters", ylab = "Silhouette Width")
library(fpc)
## Warning: package 'fpc' was built under R version 3.4.4
#Input the scaled cereals_data
hclust_stability = clusterboot(cereals_data, clustermethod=hclustCBI, method="ward.D2", k=6, count = FALSE)
hclust_stability
## * Cluster stability assessment *
## Cluster method: hclust/cutree
## Full clustering results are given as parameter result
## of the clusterboot object, which also provides further statistics
## of the resampling results.
## Number of resampling runs: 100
##
## Number of clusters found in data: 6
##
## Clusterwise Jaccard bootstrap (omitting multiple points) mean:
## [1] 0.7946011 0.7074372 0.6283461 0.9518663 0.5833768 0.5129733
## dissolved:
## [1] 22 13 25 2 38 56
## recovered:
## [1] 78 35 18 88 13 14
clusters = hclust_stability$result$partition
#Cluster stability values
hclust_stability$bootmean
## [1] 0.7946011 0.7074372 0.6283461 0.9518663 0.5833768 0.5129733
#Cluster dissolution rate. If maximum Jaccard coefficient < 0.5, that cluster is assumed to be dissolved. Below code shows the number of times each cluster was dissolved. The lower the value, the better.
hclust_stability$bootbrd
## [1] 22 13 25 2 38 56
#Scaling the numeric attributes
cereals_data_copy[,num_Attr] = scale(cereals_data_copy[,num_Attr],scale=T,center=T)
#Calculating gower distance
library(cluster)
gower_dist = daisy(cereals_data_copy,metric = "gower")
head(gower_dist)
## [1] 0.33742882 0.06931482 0.14573742 0.30380269 0.37045842 0.40646494
class(gower_dist)
## [1] "dissimilarity" "dist"
#Now that you have the distance matrix, do the hclust()
hc_fit_mixed <- hclust(gower_dist, method = "ward.D2")
plot(hc_fit_mixed )
points_hc_mixed <- cutree(hc_fit_mixed , k = 6)
# Store the clusters in a data frame along with the cereals data
cereals_clusts_hc_mixed <- cbind(points_hc_mixed, cereals_data)
# Have a look at the head of the new data frame
colnames(cereals_clusts_hc_mixed)[1] <- "cluster_hc_mixed"
head(cereals_clusts_hc_mixed)
## cluster_hc_mixed calories protein
## 100%_Bran 1 -1.8929836 1.3286071
## 100%_Natural_Bran 2 0.6732089 0.4151897
## All-Bran 1 -1.8929836 1.3286071
## All-Bran_with_Extra_Fiber 1 -2.9194605 1.3286071
## Almond_Delight 2 0.1599704 -0.4982277
## Apple_Cinnamon_Cheerios 3 0.1599704 -0.4982277
## fat sodium fiber carbo
## 100%_Bran -0.01290349 -0.3539844 3.29284661 -2.5214018
## 100%_Natural_Bran 3.96137277 -1.7257708 -0.06375361 -1.7487994
## All-Bran -0.01290349 1.1967306 2.87327158 -2.0063336
## All-Bran_with_Extra_Fiber -1.00647256 -0.2346986 4.97114672 -1.7487994
## Almond_Delight 0.98066557 0.4810160 -0.48332864 -0.2035948
## Apple_Cinnamon_Cheerios 0.98066557 0.2424445 -0.27354112 -1.1049642
## sugars potass vitamins weight
## 100%_Bran -0.2298253 2.6078060 -0.1453172 -0.1967771
## 100%_Natural_Bran 0.2293718 0.5262315 -1.2642598 -0.1967771
## All-Bran -0.4594238 3.1820334 -0.1453172 -0.1967771
## All-Bran_with_Extra_Fiber -1.6074165 3.3255903 -0.1453172 -0.1967771
## Almond_Delight 0.2293718 -0.5813726 -0.1453172 -0.1967771
## Apple_Cinnamon_Cheerios 0.6885689 -0.4068881 -0.1453172 -0.1967771
## cups rating Shelf1 Shelf2 Shelf3
## 100%_Bran -2.1100340 1.8321876 0 0 1
## 100%_Natural_Bran 0.7690100 -0.6180571 0 0 1
## All-Bran -2.1100340 1.1930986 0 0 1
## All-Bran_with_Extra_Fiber -1.3795303 3.6333849 0 0 1
## Almond_Delight -0.3052601 -0.5894990 0 0 1
## Apple_Cinnamon_Cheerios -0.3052601 -0.9365625 1 0 0
plot(hc_fit_mixed)
rect.hclust(hc_fit_mixed, k = 5, border = "red")
library(cluster)
gower_dist = daisy(x = cereals_data_copy, metric = "gower")
sil_value_hc_mixed = silhouette(points_hc_mixed, dist = gower_dist)
plot(sil_value_hc_mixed)
set.seed(123)
km_basic <- kmeans(cereals_data, centers = 2, nstart = 20)
str(km_basic)
## List of 9
## $ cluster : Named int [1:77] 2 1 2 2 1 1 1 1 2 2 ...
## ..- attr(*, "names")= chr [1:77] "100%_Bran" "100%_Natural_Bran" "All-Bran" "All-Bran_with_Extra_Fiber" ...
## $ centers : num [1:2, 1:15] 0.432 -0.847 -0.337 0.661 0.182 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "1" "2"
## .. ..$ : chr [1:15] "calories" "protein" "fat" "sodium" ...
## $ totss : num 961
## $ withinss : num [1:2] 478 300
## $ tot.withinss: num 778
## $ betweenss : num 183
## $ size : int [1:2] 51 26
## $ iter : int 1
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
fviz_cluster(km_basic, cereals_data)
The kmeans() function returns a list of 9 objects which include the cluster assignments (“cluster”), cluster centers (“centers”), etc. You can further explore the returned object by calling the str() function on the returned object and going through the documentation
Let’s now build a screen plot to choose a “k”
# Initialize wss to 0
wss <- 0
# From 1 upto upto 10 cluster centers, fit the kmeans model
for (i in 1:20) {
cfit = kmeans(cereals_data, centers = i, nstart = 20)
# Store the sum of within sum of square
wss[i] <- sum(cfit$withinss)
}
plot(1:20, wss, type = "b")
set.seed(123)
fviz_nbclust(cereals_data, kmeans, method = "wss")
set.seed(123)
km_clust <- kmeans(cereals_data, 6)
#Not using 'nstart' parameter for reproducability during cluster stability checking. Initial centriods would be chosen based on set seed here.
# after choosing k as 6, let's store the cluster groupings along with the data in a new data frame
km_points <- km_clust$cluster
# Store the cluster assignments in a new data frame
cereals_clusts_km <- as.data.frame(cbind(km_clust$cluster, cereals_data))
# Look at the head of the data
head(cereals_clusts_km)
## km_clust$cluster calories protein
## 100%_Bran 6 -1.8929836 1.3286071
## 100%_Natural_Bran 2 0.6732089 0.4151897
## All-Bran 6 -1.8929836 1.3286071
## All-Bran_with_Extra_Fiber 6 -2.9194605 1.3286071
## Almond_Delight 3 0.1599704 -0.4982277
## Apple_Cinnamon_Cheerios 3 0.1599704 -0.4982277
## fat sodium fiber carbo
## 100%_Bran -0.01290349 -0.3539844 3.29284661 -2.5214018
## 100%_Natural_Bran 3.96137277 -1.7257708 -0.06375361 -1.7487994
## All-Bran -0.01290349 1.1967306 2.87327158 -2.0063336
## All-Bran_with_Extra_Fiber -1.00647256 -0.2346986 4.97114672 -1.7487994
## Almond_Delight 0.98066557 0.4810160 -0.48332864 -0.2035948
## Apple_Cinnamon_Cheerios 0.98066557 0.2424445 -0.27354112 -1.1049642
## sugars potass vitamins weight
## 100%_Bran -0.2298253 2.6078060 -0.1453172 -0.1967771
## 100%_Natural_Bran 0.2293718 0.5262315 -1.2642598 -0.1967771
## All-Bran -0.4594238 3.1820334 -0.1453172 -0.1967771
## All-Bran_with_Extra_Fiber -1.6074165 3.3255903 -0.1453172 -0.1967771
## Almond_Delight 0.2293718 -0.5813726 -0.1453172 -0.1967771
## Apple_Cinnamon_Cheerios 0.6885689 -0.4068881 -0.1453172 -0.1967771
## cups rating Shelf1 Shelf2 Shelf3
## 100%_Bran -2.1100340 1.8321876 0 0 1
## 100%_Natural_Bran 0.7690100 -0.6180571 0 0 1
## All-Bran -2.1100340 1.1930986 0 0 1
## All-Bran_with_Extra_Fiber -1.3795303 3.6333849 0 0 1
## Almond_Delight -0.3052601 -0.5894990 0 0 1
## Apple_Cinnamon_Cheerios -0.3052601 -0.9365625 1 0 0
colnames(cereals_clusts_km)[1] <- "cluster_km"
fviz_cluster(km_clust, cereals_data)
library(cluster)
dist = daisy(x = cereals_data, metric = "euclidean")
## Warning in daisy(x = cereals_data, metric = "euclidean"): binary
## variable(s) 13, 14, 15 treated as interval scaled
sil_value = silhouette(km_clust$cluster, dist = dist)
plot(sil_value)
library(fpc)
#Input the scaled cereals_data
#Input the samee seeds used above for reproducability of clustering performed
km_stability <- clusterboot(cereals_data, clustermethod=kmeansCBI,krange = 6, seed = 123, count = FALSE)
km_stability
## * Cluster stability assessment *
## Cluster method: kmeans
## Full clustering results are given as parameter result
## of the clusterboot object, which also provides further statistics
## of the resampling results.
## Number of resampling runs: 100
##
## Number of clusters found in data: 6
##
## Clusterwise Jaccard bootstrap (omitting multiple points) mean:
## [1] 0.4634986 0.6287747 0.9003312 0.6773105 0.6106193 0.7117928
## dissolved:
## [1] 68 33 2 34 35 33
## recovered:
## [1] 28 23 81 43 25 63
km_stability$result[1]
## $result
## K-means clustering with 6 clusters of sizes 6, 17, 22, 11, 18, 3
##
## Cluster means:
## calories protein fat sodium fiber carbo
## 1 0.5021294 0.1107173 -0.17849834 0.58042081 -0.13368278 0.8694640
## 2 0.8543519 0.6301115 1.09755605 -0.08032881 0.42986407 -0.2793401
## 3 0.2066285 -0.9134174 0.03225874 0.12315869 -0.65497296 -0.5898959
## 4 -1.0997968 0.1660759 -0.73549918 -1.80167990 -0.03705338 0.2382566
## 5 -0.1821886 0.1614627 -0.50968803 0.79911139 -0.15699250 0.8980789
## 6 -2.2351425 1.3286071 -0.34409318 0.20268253 3.71242164 -2.0921783
## sugars potass vitamins weight cups rating
## 1 -0.1532925 -0.03603288 3.2115106 0.6892953 0.5899650 -0.3251413
## 2 0.4724761 0.77112261 -0.2111374 0.8391457 -0.6591374 -0.2568016
## 3 0.9912215 -0.72477153 -0.1453172 -0.1967771 0.2084363 -0.9784393
## 4 -1.0631418 -0.05505424 -0.8573716 -0.9036212 0.1088222 1.3797876
## 5 -0.8293326 -0.30320814 -0.1453172 -0.1967771 0.4156945 0.3336558
## 6 -0.7655552 3.03847656 -0.1453172 -0.1967771 -1.8665328 2.2195570
## Shelf1 Shelf2 Shelf3
## 1 0.0000000 0.00000000 1.00000000
## 2 0.0000000 0.11764706 0.88235294
## 3 0.2727273 0.63636364 0.09090909
## 4 0.3636364 0.36363636 0.27272727
## 5 0.5555556 0.05555556 0.38888889
## 6 0.0000000 0.00000000 1.00000000
##
## Clustering vector:
## 100%_Bran
## 6
## 100%_Natural_Bran
## 2
## All-Bran
## 6
## All-Bran_with_Extra_Fiber
## 6
## Almond_Delight
## 3
## Apple_Cinnamon_Cheerios
## 3
## Apple_Jacks
## 3
## Basic_4
## 2
## Bran_Chex
## 5
## Bran_Flakes
## 5
## Cap'n'Crunch
## 3
## Cheerios
## 5
## Cinnamon_Toast_Crunch
## 3
## Clusters
## 2
## Cocoa_Puffs
## 3
## Corn_Chex
## 5
## Corn_Flakes
## 5
## Corn_Pops
## 3
## Count_Chocula
## 3
## Cracklin'_Oat_Bran
## 2
## Cream_of_Wheat_(Quick)
## 4
## Crispix
## 5
## Crispy_Wheat_&_Raisins
## 3
## Double_Chex
## 5
## Froot_Loops
## 3
## Frosted_Flakes
## 3
## Frosted_Mini-Wheats
## 4
## Fruit_&_Fibre_Dates,_Walnuts,_and_Oats
## 2
## Fruitful_Bran
## 2
## Fruity_Pebbles
## 3
## Golden_Crisp
## 3
## Golden_Grahams
## 3
## Grape_Nuts_Flakes
## 5
## Grape-Nuts
## 5
## Great_Grains_Pecan
## 2
## Honey_Graham_Ohs
## 3
## Honey_Nut_Cheerios
## 3
## Honey-comb
## 3
## Just_Right_Crunchy__Nuggets
## 1
## Just_Right_Fruit_&_Nut
## 1
## Kix
## 5
## Life
## 2
## Lucky_Charms
## 3
## Maypo
## 4
## Muesli_Raisins,_Dates,_&_Almonds
## 2
## Muesli_Raisins,_Peaches,_&_Pecans
## 2
## Mueslix_Crispy_Blend
## 2
## Multi-Grain_Cheerios
## 5
## Nut&Honey_Crunch
## 3
## Nutri-Grain_Almond-Raisin
## 2
## Nutri-grain_Wheat
## 5
## Oatmeal_Raisin_Crisp
## 2
## Post_Nat._Raisin_Bran
## 2
## Product_19
## 1
## Puffed_Rice
## 4
## Puffed_Wheat
## 4
## Quaker_Oat_Squares
## 2
## Quaker_Oatmeal
## 4
## Raisin_Bran
## 2
## Raisin_Nut_Bran
## 2
## Raisin_Squares
## 4
## Rice_Chex
## 5
## Rice_Krispies
## 5
## Shredded_Wheat
## 4
## Shredded_Wheat_'n'Bran
## 4
## Shredded_Wheat_spoon_size
## 4
## Smacks
## 3
## Special_K
## 5
## Strawberry_Fruit_Wheats
## 4
## Total_Corn_Flakes
## 1
## Total_Raisin_Bran
## 1
## Total_Whole_Grain
## 1
## Triples
## 5
## Trix
## 3
## Wheat_Chex
## 5
## Wheaties
## 5
## Wheaties_Honey_Gold
## 3
##
## Within cluster sum of squares by cluster:
## [1] 35.44378 113.77594 67.35068 79.04588 99.87212 10.56030
## (between_SS / total_SS = 57.8 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault" "crit" "bestk"
groups_km = km_stability$result$partition
groups_km
## 100%_Bran
## 6
## 100%_Natural_Bran
## 2
## All-Bran
## 6
## All-Bran_with_Extra_Fiber
## 6
## Almond_Delight
## 3
## Apple_Cinnamon_Cheerios
## 3
## Apple_Jacks
## 3
## Basic_4
## 2
## Bran_Chex
## 5
## Bran_Flakes
## 5
## Cap'n'Crunch
## 3
## Cheerios
## 5
## Cinnamon_Toast_Crunch
## 3
## Clusters
## 2
## Cocoa_Puffs
## 3
## Corn_Chex
## 5
## Corn_Flakes
## 5
## Corn_Pops
## 3
## Count_Chocula
## 3
## Cracklin'_Oat_Bran
## 2
## Cream_of_Wheat_(Quick)
## 4
## Crispix
## 5
## Crispy_Wheat_&_Raisins
## 3
## Double_Chex
## 5
## Froot_Loops
## 3
## Frosted_Flakes
## 3
## Frosted_Mini-Wheats
## 4
## Fruit_&_Fibre_Dates,_Walnuts,_and_Oats
## 2
## Fruitful_Bran
## 2
## Fruity_Pebbles
## 3
## Golden_Crisp
## 3
## Golden_Grahams
## 3
## Grape_Nuts_Flakes
## 5
## Grape-Nuts
## 5
## Great_Grains_Pecan
## 2
## Honey_Graham_Ohs
## 3
## Honey_Nut_Cheerios
## 3
## Honey-comb
## 3
## Just_Right_Crunchy__Nuggets
## 1
## Just_Right_Fruit_&_Nut
## 1
## Kix
## 5
## Life
## 2
## Lucky_Charms
## 3
## Maypo
## 4
## Muesli_Raisins,_Dates,_&_Almonds
## 2
## Muesli_Raisins,_Peaches,_&_Pecans
## 2
## Mueslix_Crispy_Blend
## 2
## Multi-Grain_Cheerios
## 5
## Nut&Honey_Crunch
## 3
## Nutri-Grain_Almond-Raisin
## 2
## Nutri-grain_Wheat
## 5
## Oatmeal_Raisin_Crisp
## 2
## Post_Nat._Raisin_Bran
## 2
## Product_19
## 1
## Puffed_Rice
## 4
## Puffed_Wheat
## 4
## Quaker_Oat_Squares
## 2
## Quaker_Oatmeal
## 4
## Raisin_Bran
## 2
## Raisin_Nut_Bran
## 2
## Raisin_Squares
## 4
## Rice_Chex
## 5
## Rice_Krispies
## 5
## Shredded_Wheat
## 4
## Shredded_Wheat_'n'Bran
## 4
## Shredded_Wheat_spoon_size
## 4
## Smacks
## 3
## Special_K
## 5
## Strawberry_Fruit_Wheats
## 4
## Total_Corn_Flakes
## 1
## Total_Raisin_Bran
## 1
## Total_Whole_Grain
## 1
## Triples
## 5
## Trix
## 3
## Wheat_Chex
## 5
## Wheaties
## 5
## Wheaties_Honey_Gold
## 3
#Cluster stability values
km_stability$bootmean
## [1] 0.4634986 0.6287747 0.9003312 0.6773105 0.6106193 0.7117928
#Cluster dissolution rate.
km_stability$bootbrd
## [1] 68 33 2 34 35 33